{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "510f8ba4",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
    "\n",
    "model_name = \"/home/yuanz/documents/weights/Qwen/Qwen2.5-0.5B-Instruct\"\n",
    "\n",
    "model = AutoModelForCausalLM.from_pretrained(\n",
    "    model_name, torch_dtype=\"auto\", device_map=\"auto\"\n",
    ")\n",
    "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
    "\n",
    "\n",
    "\n",
    "# Keep the process alive for a while to see the output\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "126e3e11",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['input_ids', 'attention_mask'])"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prompt = \"介绍一下你自己\"\n",
    "\n",
    "\n",
    "def build_model_inputs(prompt):\n",
    "    messages = [\n",
    "        {\n",
    "            \"role\": \"system\",\n",
    "            \"content\": \"You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\",\n",
    "        },\n",
    "        {\"role\": \"user\", \"content\": prompt},\n",
    "    ]\n",
    "    text = tokenizer.apply_chat_template(\n",
    "        messages, tokenize=False, add_generation_prompt=True\n",
    "    )\n",
    "    model_inputs = tokenizer([text], return_tensors=\"pt\").to(model.device)\n",
    "    return model_inputs\n",
    "\n",
    "\n",
    "model_inputs = build_model_inputs(prompt)\n",
    "model_inputs.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "67fe90e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "model_outputs = model(**model_inputs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "fd807c1d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'shape': torch.Size([1, 31, 151936]),\n",
       " 'dtype': torch.bfloat16,\n",
       " 'logits_mean': [4.875, 4.375, 2.984375],\n",
       " 'value': [3.96875,\n",
       "  7.8125,\n",
       "  3.296875,\n",
       "  -0.89453125,\n",
       "  6.65625,\n",
       "  3.734375,\n",
       "  4.5,\n",
       "  6.6875,\n",
       "  8.5625,\n",
       "  6.375],\n",
       " 'grad_enabled': True}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def show_model_outputs(model_outputs):\n",
    "    logits = model_outputs.logits\n",
    "    logits_info = {\n",
    "        \"shape\": logits.shape,\n",
    "        'dtype': logits.dtype,\n",
    "        'logits_mean':logits.mean(axis=1)[0, :3].tolist(),  # Convert to list for better readability\n",
    "        \"value\": logits[0, 0, :10].tolist(),  # Convert to list for better readability\n",
    "        \"grad_enabled\": logits.requires_grad,\n",
    "    }\n",
    "    return logits_info\n",
    "\n",
    "\n",
    "show_model_outputs(model_outputs)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c4b7d0f7",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0fefceb4",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a5ebb007",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a25d041c",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "b5d5716d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[-0.00506591796875, -0.006439208984375, -0.00787353515625]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.lm_head.weight.data[0, :3].tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f1a39c1b",
   "metadata": {},
   "outputs": [],
   "source": [
    "model.lm_head.weight.shape, model.lm_head.weight"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f8dd3de9",
   "metadata": {},
   "outputs": [],
   "source": [
    "model.get_input_embeddings().weight.shape, model.get_input_embeddings().weight"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0af209de",
   "metadata": {},
   "outputs": [],
   "source": [
    "model.config.tie_word_embeddings\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7082b555",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "verl0624",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
